libname Input "Input Data";
libname Output "Output Data";
data _null_;
	x 'cd Input Data';
run;

proc import datafile=".\BoardEx_ListingMatch.csv"
			dbms=dlm
			out=BX_ListingMatch
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 41000;
run;


data Contracts;
	infile '.\GovContracts_2000_2020.csv' delimiter = ',' MISSOVER DSD lrecl = 32767 firstobs = 2;
	informat contract_transaction_unique_key $200.;
	informat contract_award_unique_key $200.;
	informat award_id_piid $200.;
	informat modification_number $200.;
	informat transaction_number BEST32.;
	informat parent_award_id_piid $200.;
	informat parent_award_modification_number $200.;
	informat federal_action_obligation BEST32.;
	informat action_date YYMMDD10.;
	informat awarding_agency_code $200.;
	informat awarding_agency_name $200.;
	informat awarding_sub_agency_code $200.;
	informat awarding_sub_agency_name $200.;
	informat recipient_duns $200.;
	informat recipient_name $200.;
	informat recipient_parent_duns $200.;
	informat recipient_parent_name $200.;
	informat recipient_address_line_1 $200.;
	informat recipient_address_line_2 $200.;
	informat recipient_city_name $200.;
	informat recipient_county_name $200.;
	informat recipient_state_code $200.;
	informat recipient_zip_4_code $200.;
	informat award_type_code $200.;
	informat award_type $200.;
	informat type_of_contract_pricing_code $200.;
	informat naics_code BEST32.;
	informat number_of_offers_received BEST32.;
	
	format contract_transaction_unique_key $200.;
	format contract_award_unique_key $200.;
	format award_id_piid $20.;
	format modification_number $20.;
	format transaction_number BEST12.;
	format parent_award_id_piid $20.;
	format parent_award_modification_number $20.;
	format federal_action_obligation BEST12.;
	format action_date YYMMDD10.;
	format awarding_agency_code $20.;
	format awarding_agency_name $200.;
	format awarding_sub_agency_code $20.;
	format awarding_sub_agency_name $200.;
	format recipient_duns $20.;
	format recipient_name $200.;
	format recipient_parent_duns $20.;
	format recipient_parent_name $200.;
	format recipient_address_line_1 $200.;
	format recipient_address_line_2 $200.;
	format recipient_city_name $50.;
	format recipient_county_name $50.;
	format recipient_state_code $20.;
	format recipient_zip_4_code $20.;
	format award_type_code $20.;
	format award_type $50.;
	format type_of_contract_pricing_code $20.;
	format naics_code BEST12.;
	format number_of_offers_received BEST12.;

    input
		contract_transaction_unique_key $
		contract_award_unique_key $
		award_id_piid $
		modification_number $
		transaction_number
		parent_award_id_piid $
		parent_award_modification_number $
		federal_action_obligation
		action_date
		awarding_agency_code $
		awarding_agency_name $
		awarding_sub_agency_code $
		awarding_sub_agency_name $
		recipient_duns $
		recipient_name $
		recipient_parent_duns $
		recipient_parent_name $
		recipient_address_line_1 $
		recipient_address_line_2 $
		recipient_city_name $
		recipient_county_name $
		recipient_state_code $
		recipient_zip_4_code $
		award_type_code $
		award_type $
		type_of_contract_pricing_code $
		naics_code
		number_of_offers_received;
run;

/* Clean unique contracts */

data Contracts_Stats;
	set Contracts (keep = award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code modification_number transaction_number
							federal_action_obligation action_date number_of_offers_received type_of_contract_pricing_code);
run;

proc sort data = Contracts_Stats; by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code type_of_contract_pricing_code; run;

proc means data = Contracts_Stats noprint;
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code;
	var action_date;
	output out = First_Action min = first_action_date;
run;

data Contracts_Stats;
	merge Contracts_Stats First_Action (drop = _:);
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code;
	Reneg = 0;
	if action_date - first_action_date > 365 then Reneg = 1;
run;

proc delete data = First_Action; run;

proc means data = Contracts_Stats (where = (Reneg = 0)) noprint;
	by award_id_piid parent_award_id_piid awarding_agency_code awarding_sub_agency_code type_of_contract_pricing_code;
	var federal_action_obligation;
	output out = Unique_Contracts sum = total_obligation min(action_date) = action_date max(number_of_offers_received) = n_offers;
run;

proc delete data = Contracts_Stats; run;

data Unique_Contracts;
	set Unique_Contracts;
	if total_obligation  = . then delete;
	if total_obligation  < 0 then delete;
	if n_offers = 999 then n_offers = .;

	Fixed_Price = 0;
	if type_of_contract_pricing_code = "A" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "B" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "J" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "K" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "L" then Fixed_Price = 1;
	if type_of_contract_pricing_code = "M" then Fixed_Price = 1;

	Cost_Plus = 0;
	if type_of_contract_pricing_code = "R" then Cost_Plus = 1;
	if type_of_contract_pricing_code = "U" then Cost_Plus = 1;
	if type_of_contract_pricing_code = "V" then Cost_Plus = 1;
	if type_of_contract_pricing_code = "Y" then Cost_Plus = 1;

	Incentives = 0;
	if type_of_contract_pricing_code = "L" then Incentives = 1;
	if type_of_contract_pricing_code = "M" then Incentives = 1;
	if type_of_contract_pricing_code = "R" then Incentives = 1;
	if type_of_contract_pricing_code = "U" then Incentives = 1;
	if type_of_contract_pricing_code = "V" then Incentives = 1;
run;

proc import datafile=".\Open\Agency_BX_Map.csv"
			dbms=dlm
			out=Agencies
			replace;
		delimiter=';';
		getnames=yes;
		guessingrows = 700;
run;
proc sort data = Agencies (keep = awarding_agency_code awarding_sub_agency_code agcy_bx agencyid_contracts where = (/*agcy_bx ~= . and*/ AgencyID ~= .) rename = (agencyid_contracts = AgencyID)) nodupkey;
	by awarding_agency_code awarding_sub_agency_code AgencyID;
run;

proc sort data = Unique_Contracts; by awarding_agency_code awarding_sub_agency_code; run;
proc sort data = Agencies out = Agencies_Contracts (where = (AgencyID ~= .)) nodupkey; by awarding_agency_code awarding_sub_agency_code; run;

data Unique_Contracts;
	merge Unique_Contracts (in = a) Agencies_Contracts (in = b);
	by awarding_agency_code awarding_sub_agency_code;
	if a = 1 and b = 1;
run;


/* Clean firm info */

data Contracts_Comp;
	set Contracts (keep = award_id_piid parent_award_id_piid awarding_sub_agency_code recipient_name recipient_parent_name naics_code);
run;

data Contracts_Comp;
	set Contracts_Comp;
	COMPANYNAME_GC = recipient_parent_name;
	if COMPANYNAME_GC = "" then COMPANYNAME_GC = recipient_name;	
	if COMPANYNAME_GC = "" then delete;
	Comp_Name = prxchange('s/\(([^\)]+)\)//i', -1, COMPANYNAME_GC);
	Comp_Name = upcase(Comp_Name);
	Comp_Name = tranwrd(Comp_Name,"LLC","");
	Comp_Name = tranwrd(Comp_Name,"INCORPORATED","");
	Comp_Name = tranwrd(Comp_Name,"INC","");
	Comp_Name = tranwrd(Comp_Name,"LTD","");
	Comp_Name = tranwrd(Comp_Name,"CORPORATION","");
	Comp_Name = tranwrd(Comp_Name,"CORPORATI","");
	Comp_Name = tranwrd(Comp_Name,"CORP","");
	Comp_Name = tranwrd(Comp_Name,"COMPANY","");
	Comp_Name = tranwrd(Comp_Name," CO","");
	Comp_Name = tranwrd(Comp_Name,"LP","");
	Comp_Name = tranwrd(Comp_Name,"INTERNATIONAL","");
	Comp_Name = tranwrd(Comp_Name,"INTL","");
	Comp_Name = tranwrd(Comp_Name,"HOLDING","");
	Comp_Name = tranwrd(Comp_Name,"HOLDINGS","");
	Comp_Name = tranwrd(Comp_Name," AND ","");
	Comp_Name = compress(Comp_Name,"ABCDEFGHIJKLMNOPQRSTUVWXYZ012345789","kis");
	Comp_Name = compress(Comp_Name);
run;

proc import datafile=".\CIQ_Contracts_FirmMap.csv"
			dbms=dlm
			out=CIQ_Contracts_Merge
			replace;
		delimiter=',';
		getnames=yes;
run;
proc import datafile=".\BX_CIQ_Merge_NAICS5.csv"
			dbms=dlm
			out=NAICS1
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 100000;
run;
proc import datafile=".\CIQ_NAICS.csv"
			dbms=dlm
			out=NAICS2
			replace;
		delimiter=',';
		getnames=yes;
		guessingrows = 100000;
run;

proc sql;
	create table CIQ_Contracts_Merge2 as
		select * from CIQ_Contracts_Merge a, NAICS1 b, NAICS2 (keep = CompanyID) c
			where a.CompanyID = b.CompanyID and b.CompanyID = c.CompanyID;
quit;

proc sql;
	create table Contracts_Comp2 as
		select * from Contracts_Comp a, CIQ_Contracts_Merge2 (drop = CompanyID) b
			where a.Comp_Name = b.Comp_Name;
quit;

proc delete data = CIQ_Contracts_Merge;
proc delete data = CIQ_Contracts_Merge2;
proc delete data = Contracts;
proc delete data = Contracts_Comp;
proc delete data = NAICS1;
proc delete data = NAICS2; run;

proc sort data = Contracts_Comp2 (drop = recipient_name recipient_parent_name naics_code companyname_gc comp_name rename = (BoardID = CompanyID))nodupkey;
	by award_id_piid parent_award_id_piid awarding_sub_agency_code CompanyID;
run;




/* Merge and finalize unique_contracts with firm info */

proc sort data = Contracts_Comp2; by award_id_piid parent_award_id_piid awarding_sub_agency_code; run;
proc sort data = Unique_Contracts; by award_id_piid parent_award_id_piid awarding_sub_agency_code; run;

data Unique_Contracts;
	merge Unique_Contracts (in = a drop = _:) Contracts_Comp2 (in = b);
	by award_id_piid parent_award_id_piid awarding_sub_agency_code;
	if a = 1 and b = 1;
	Year = year(action_date);
run;

proc sort data = Unique_Contracts; by CompanyID AgencyID Year; run;
proc sort data = BX_ListingMatch; by CompanyID; run;

data Unique_Contracts;
	merge Unique_Contracts (in = a) BX_ListingMatch (in = b);
	by CompanyID;
	if a = 1;
	if b = 1 then CompanyID = CompanyID_New;
	drop CompanyID_New;
run; 

proc sort data = Unique_Contracts; by CompanyID AgencyID Year; run;

proc means data = Unique_Contracts noprint;
	by CompanyID AgencyID Year;
	var total_obligation n_offers Fixed_Price Cost_Plus Incentives;
	output out = Output.Contracts_Year sum = total_obligation n_offers Fixed_Price Cost_Plus Incentives n = n_Contracts;
run;
